library(tidyverse)
library(readxl)
library(janitor)
path = "files/Excel Challenge 22nd Dec.xlsx"
input = read_excel(path, range = "B3:B7")
test = read_excel(path, range = "D3:G7")
rows = strsplit(input$Problem, "\r\n")[[1]]
process_row = function(row) {
words = unlist(strsplit(row, " "))
non_digits = grep("^[^0-9]+", words, value = TRUE)
digits = grep("^[0-9]+", words, value = TRUE)
n = length(non_digits)
if (n == 2) {
customer = non_digits[1]
item = non_digits[2]
} else if (n == 3) {
customer = paste(non_digits[1:2], collapse = " ")
item = non_digits[3]
} else if (n == 4) {
if (grepl("^[A-Z]\\.$", non_digits[2])) {
customer = paste(non_digits[1:3], collapse = " ")
item = non_digits[4]
} else {
customer = paste(non_digits[1:2], collapse = " ")
item = paste(non_digits[3:4], collapse = " ")
}
}
selling_price = digits[1]
buying_price = digits[2]
return(c(Customer = customer, Item = item, `Selling Price` = selling_price, `Buying Price` = buying_price))
}
processed_data = map_dfr(rows, ~ as.data.frame(t(process_row(.x)), stringsAsFactors = FALSE))
df = as.data.frame(processed_data, stringsAsFactors = FALSE) %>%
rownames_to_column() %>%
select(-1) %>%
mutate(`Selling Price` = as.numeric(`Selling Price`),
`Buying Price` = as.numeric(`Buying Price`))
all.equal(df, test, check.attributes = FALSE)
#> [1] TRUECrispo - Excel Challenge 51 2024
excel-challenges
weekly-exercises
Easy Sunday Excel Challenge

Challenge Description
Easy Sunday Excel Challenge
⭐ Solution Required Problem Customer Item Selling Price Buying Price
Solutions
Logic:
Reads the workbook range needed for the challenge
Builds the intermediate helper columns that drive the final answer
Strengths:
- The R solution stays compact and mirrors the workbook logic closely.
Areas for Improvement:
- The code assumes the workbook layout and named ranges remain stable.
Gem:
- The best part of the solution is choosing a tidy intermediate shape before producing the final answer.
import pandas as pd
import re
path = "files/Excel Challenge 22nd Dec.xlsx"
input = pd.read_excel(path, usecols="B", skiprows=2, nrows=1)
test = pd.read_excel(path, usecols="D:G", skiprows=2, nrows=5)
rows = input.iloc[:, 0].str.split('\n').explode().tolist()
def process_row(row):
words = row.split()
non_digits = [word for word in words if not word.isdigit()]
digits = [word for word in words if word.isdigit()]
n = len(non_digits)
if n >= 2:
customer = non_digits[0]
item = non_digits[1] if len(non_digits) > 1 else ""
if n == 3:
customer = " ".join(non_digits[0:2])
item = non_digits[2]
elif n == 4:
if re.match(r"^[A-Z]\.$", non_digits[1]):
customer = " ".join(non_digits[0:3])
item = non_digits[3]
else:
customer = " ".join(non_digits[0:2])
item = " ".join(non_digits[2:4])
selling_price = digits[0]
buying_price = digits[1]
return {"Customer": customer, "Item": item, "Selling Price": selling_price, "Buying Price": buying_price}
processed_data = pd.DataFrame([process_row(row) for row in rows])
processed_data["Selling Price"] = processed_data["Selling Price"].astype('int64')
processed_data["Buying Price"] = processed_data["Buying Price"].astype('int64')
df = processed_data.reset_index(drop=True)
print(processed_data.equals(test)) # TrueLogic:
Reads the workbook range needed for the challenge
Uses direct text-pattern extraction instead of manual cleanup
Applies the rule iteratively until the output is complete
Strengths:
- The Python version keeps the same rule in a direct pandas-oriented workflow.
Areas for Improvement:
- As with the R version, any workbook layout change would require small adjustments.
Gem:
- The implementation stays close to the stated challenge instead of adding unnecessary complexity.
Difficulty Level
This task is easy to moderate:
- The business rule is readable, but the workbook still needs a few careful transformation steps.